Load raw data
if(!file.exists('./../Data/Voineagu.RData')){
load('~/MSc/Dissertation/Data/RDatas/max_var_between/LumiBatch_Voineagu.RData')
# Remove cerebellum samples
LumiBatch_v = LumiBatch_v[,colnames(LumiBatch_v) %in%
pData(LumiBatch_v)$GSM[pData(LumiBatch_v)$Cortex.area!='cerebellum']]
pData(LumiBatch_v) = pData(LumiBatch_v)[pData(LumiBatch_v)$Cortex.area != 'cerebellum',]
datExpr = exprs(LumiBatch_v)
datMeta = pData(LumiBatch_v)
# Perform Differential Expression Analysis
SAM_fit = SAM(datExpr, as.factor(datMeta$Disease.status), resp.type = 'Two class unpaired',
geneid = rownames(datExpr), random.seed = 1234, logged2 = FALSE, fdr.output = 1)
genes_up = SAM_fit$siggenes.table$genes.up %>% data.frame
genes_down = SAM_fit$siggenes.table$genes.lo %>% data.frame
DE_info = genes_up %>% bind_rows(genes_down)
# Load SFARI information
SFARI_genes = read_csv('./../../Gandal/RNAseq/working_data/SFARI_genes_01-15-2019.csv')
ilmn_gene_map = mapIds(illuminaHumanv4.db, key=rownames(datExpr),
column=c('SYMBOL'), keytype='PROBEID', multiVals='filter')
SFARI_genes = data.frame('ID'=names(ilmn_gene_map), 'gene-symbol'=unname(ilmn_gene_map)) %>%
right_join(SFARI_genes, by=c('gene.symbol'='gene-symbol'))
# Add neuronal annotations
GO_annotations = read.csv('./../../Gandal/RNAseq/working_data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>%
distinct(ensembl_gene_id) %>%
mutate('Neuronal' = 1)
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org')
ensembl_gene_map = getBM(attributes=c('ensembl_gene_id', 'hgnc_symbol'), filters=c('ensembl_gene_id'),
values=GO_neuronal$ensembl_gene_id, mart=mart)
ensembl_ilmn_gene_map = data.frame('ID' = names(ilmn_gene_map), 'gene.symbol'=unname(ilmn_gene_map)) %>%
full_join(ensembl_gene_map, by=c('gene.symbol'='hgnc_symbol'))
GO_neuronal = GO_neuronal %>% left_join(ensembl_ilmn_gene_map, by='ensembl_gene_id') %>%
filter(!is.na(ID))
save(datExpr, datMeta, GO_neuronal, SFARI_genes, DE_info, LumiBatch_v,
file='./../Data/Voineagu.RData')
rm(gpl, gse, GO_annotations, mart, ensembl_gene_map, ilmn_gene_map, gene_names,
genes_up, genes_down, SAM_fit, ensembl_ilmn_gene_map)
} else load('./../Data/Voineagu.RData')
datExpr_backup = datExpr
maxFC = max(as.numeric(DE_info$Fold.Change[DE_info$Fold.Change!='Inf']))
DE_info = DE_info %>% mutate('logFC'=ifelse(Fold.Change=='Inf', log2(maxFC+1),
log2(as.numeric(Fold.Change)+1)),
'ID' = Gene.Name)
datMeta = datMeta %>% mutate('Diagnosis_' = Disease.status)
rm(maxFC)
Number of genes and samples:
dim(datExpr)
## [1] 15809 58
Gene count by SFARI score of remaining genes:
table(SFARI_genes$`gene-score`)
##
## 1 2 3 4 5 6
## 25 62 195 454 175 25
Gene count by Diagnosis and Brain lobe:
t(table(datMeta$Cortex.area, datMeta$Diagnosis_))
##
## frontal temporal
## autism 16 13
## control 16 13
Relation between SFARI scores and Neuronal functional annotation:
There is one gene in the SFARI list that has score both 3 and 4
Higher SFARI scores have a higher percentage of genes with neuronal functional annotation (makes sense)
Neuronal_SFARI = data.frame('ID'=rownames(datExpr), 'Neuronal'=rownames(datExpr) %in% GO_neuronal$ID) %>%
left_join(SFARI_genes, by='ID')
tbl = table(Neuronal_SFARI$`gene-score`, Neuronal_SFARI$Neuronal, useNA='ifany') %>% t %>% as.data.frame.matrix
rownames(tbl) = c('Non-Neuronal','Neuronal')
tbl
## 1 2 3 4 5 6 NA
## Non-Neuronal 12 40 136 343 128 20 14005
## Neuronal 8 16 42 72 39 4 945
tbl = round(sweep(tbl, 2, colSums(tbl), `/`)*100, 2)
tbl
## 1 2 3 4 5 6 NA
## Non-Neuronal 60 71.43 76.4 82.65 76.65 83.33 93.68
## Neuronal 40 28.57 23.6 17.35 23.35 16.67 6.32
rm(Neuronal_SFARI, tbl)
make_ASD_vs_CTL_df = function(datExpr){
datExpr_ASD = datExpr %>% data.frame %>% dplyr::select(which(datMeta$Diagnosis_=='autism'))
datExpr_CTL = datExpr %>% data.frame %>% dplyr::select(which(datMeta$Diagnosis_=='control'))
ASD_vs_CTL = data.frame('ID'=as.character(rownames(datExpr)),
'mean' = rowMeans(datExpr), 'sd' = apply(datExpr, 1, sd),
'mean_ASD'=rowMeans(datExpr_ASD), 'mean_CTL'=rowMeans(datExpr_CTL),
'sd_ASD'=apply(datExpr_ASD,1,sd), 'sd_CTL'=apply(datExpr_CTL,1,sd)) %>%
mutate('mean_diff'=mean_ASD-mean_CTL, 'sd_diff'=sd_ASD-sd_CTL) %>%
left_join(SFARI_genes, by='ID') %>%
dplyr::select(ID, mean, sd, mean_ASD, mean_CTL, mean_diff, sd_ASD, sd_CTL, sd_diff, `gene-score`) %>%
mutate('Neuronal'=ifelse(ID %in% GO_neuronal$ID, 'Neuronal', 'Non-Neuronal'))
ASD_vs_CTL_SFARI = ASD_vs_CTL %>% filter(!is.na(`gene-score`)) %>%
mutate(Group = as.character(`gene-score`)) %>% dplyr::select(-c(Neuronal,`gene-score`))
ASD_vs_CTL_Neuro = ASD_vs_CTL %>% mutate(Group = Neuronal) %>% dplyr::select(-c(Neuronal,`gene-score`))
ASD_vs_CTL_all = ASD_vs_CTL %>% mutate(Group = 'All') %>% dplyr::select(-c(Neuronal,`gene-score`))
ASD_vs_CTL_together = bind_rows(ASD_vs_CTL_SFARI, ASD_vs_CTL_Neuro, ASD_vs_CTL_all) %>%
mutate(Group = ordered(Group, levels=c('1','2','3','4','5','6',
'Neuronal','Non-Neuronal','All'))) %>%
left_join(DE_info, by='ID')
return(ASD_vs_CTL_together)
}
ASD_vs_CTL = make_ASD_vs_CTL_df(datExpr)
p1 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, mean, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Mean (left) and Standard Deviation (right) by score') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
p2 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, sd, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Mean (left) and Standard Deviation (right) by score') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
subplot(p1, p2, nrows=1)
rm(p1, p2)
There is heterocedasticity in the data with the variance increasing at a lower rate than the mean. The relation doesn’t seem to be completely linear, but close to.
ggplot(ASD_vs_CTL, aes(mean+1, sd+1)) + geom_point(alpha=0.1, aes(id=ID, fill=Group, color=Group)) +
scale_color_manual(values=gg_colour_hue(9)) + scale_fill_manual(values=gg_colour_hue(9)) +
scale_x_continuous(trans='log2') + scale_y_continuous(trans='log2') +
ggtitle(paste0('Corr=',round(cor(ASD_vs_CTL$mean, ASD_vs_CTL$sd), 2))) +
geom_smooth(method=lm, se=FALSE, color='#808080', size=0.5) + theme_minimal()
There doesn’t seem to be a clear pattern between difference in mean and SFARI score?
The higher the SFARI score, the smaller the log Fold Change between Autism and Control except for scores 4 and 6
p1 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, abs(mean_diff), fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Difference in mean (left) and log Fold Change (right) by Diagnosis') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
p2 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, abs(logFC), fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Difference in mean (left) and log Fold Change (right) by Diagnosis') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
subplot(p1, p2, nrows=1)
rm(p1, p2)
# Variance stabilisation
LumiBatch_v = lumiT(LumiBatch_v, method = 'log2', ifPlot = TRUE)
## Perform log2 transformation ...
datExpr = exprs(LumiBatch_v)
# Perform Differential Expression Analysis
SAM_fit = SAM(datExpr, as.factor(datMeta$Disease.status), resp.type = 'Two class unpaired',
geneid = rownames(datExpr), random.seed = 1234, logged2 = TRUE, fdr.output = 1)
## perm= 1
## perm= 2
## perm= 3
## perm= 4
## perm= 5
## perm= 6
## perm= 7
## perm= 8
## perm= 9
## perm= 10
## perm= 11
## perm= 12
## perm= 13
## perm= 14
## perm= 15
## perm= 16
## perm= 17
## perm= 18
## perm= 19
## perm= 20
## perm= 21
## perm= 22
## perm= 23
## perm= 24
## perm= 25
## perm= 26
## perm= 27
## perm= 28
## perm= 29
## perm= 30
## perm= 31
## perm= 32
## perm= 33
## perm= 34
## perm= 35
## perm= 36
## perm= 37
## perm= 38
## perm= 39
## perm= 40
## perm= 41
## perm= 42
## perm= 43
## perm= 44
## perm= 45
## perm= 46
## perm= 47
## perm= 48
## perm= 49
## perm= 50
## perm= 51
## perm= 52
## perm= 53
## perm= 54
## perm= 55
## perm= 56
## perm= 57
## perm= 58
## perm= 59
## perm= 60
## perm= 61
## perm= 62
## perm= 63
## perm= 64
## perm= 65
## perm= 66
## perm= 67
## perm= 68
## perm= 69
## perm= 70
## perm= 71
## perm= 72
## perm= 73
## perm= 74
## perm= 75
## perm= 76
## perm= 77
## perm= 78
## perm= 79
## perm= 80
## perm= 81
## perm= 82
## perm= 83
## perm= 84
## perm= 85
## perm= 86
## perm= 87
## perm= 88
## perm= 89
## perm= 90
## perm= 91
## perm= 92
## perm= 93
## perm= 94
## perm= 95
## perm= 96
## perm= 97
## perm= 98
## perm= 99
## perm= 100
##
## Computing delta table
## 1
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23
## 24
## 25
## 26
## 27
## 28
## 29
## 30
## 31
## 32
## 33
## 34
## 35
## 36
## 37
## 38
## 39
## 40
## 41
## 42
## 43
## 44
## 45
## 46
## 47
## 48
## 49
## 50
genes_up = SAM_fit$siggenes.table$genes.up %>% data.frame
genes_down = SAM_fit$siggenes.table$genes.lo %>% data.frame
DE_info = genes_up %>% bind_rows(genes_down)
maxFC = max(as.numeric(DE_info$Fold.Change[DE_info$Fold.Change!='Inf']))
DE_info = DE_info %>% mutate('logFC'=ifelse(Fold.Change=='Inf', log2(maxFC+1),
log2(as.numeric(Fold.Change)+1)),
'ID' = Gene.Name)
rm(SAM_fit, genes_up, genes_down, maxFC)
ASD_vs_CTL = make_ASD_vs_CTL_df(datExpr)
p1 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, mean, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Mean (left) and Standard Deviation (right) by score') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
p2 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, sd, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Mean (left) and Standard Deviation (right) by score') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
subplot(p1, p2, nrows=1)
rm(p1, p2)
ggplot(ASD_vs_CTL, aes(mean, sd)) + geom_point(alpha=0.1, aes(id=ID, fill=Group, color=Group)) +
scale_color_manual(values=gg_colour_hue(9)) + scale_fill_manual(values=gg_colour_hue(9)) +
scale_x_continuous(trans='log2') + scale_y_continuous(trans='log2') +
ggtitle(paste0('Corr=',round(cor(ASD_vs_CTL$mean, ASD_vs_CTL$sd), 2))) +
geom_smooth(method=lm, se=FALSE, color='#808080', size=0.5) + theme_minimal()
p1 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, abs(mean_diff), fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Difference in mean (left) and log Fold Change (right) by Diagnosis') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
p2 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, logFC, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Difference in mean (left) and log Fold Change (right) by Diagnosis') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
subplot(p1, p2, nrows=1)
rm(p1, p2)
# Normalisation
LumiBatch_v = lumiN(LumiBatch_v, method='rsn')
## Perform rsn normalization ...
## 2019-05-08 15:46:34 , processing array 1
## 2019-05-08 15:46:34 , processing array 2
## 2019-05-08 15:46:34 , processing array 3
## 2019-05-08 15:46:34 , processing array 4
## 2019-05-08 15:46:34 , processing array 5
## 2019-05-08 15:46:34 , processing array 6
## 2019-05-08 15:46:35 , processing array 7
## 2019-05-08 15:46:35 , processing array 8
## 2019-05-08 15:46:35 , processing array 9
## 2019-05-08 15:46:35 , processing array 10
## 2019-05-08 15:46:35 , processing array 11
## 2019-05-08 15:46:35 , processing array 12
## 2019-05-08 15:46:35 , processing array 13
## 2019-05-08 15:46:35 , processing array 14
## 2019-05-08 15:46:35 , processing array 15
## 2019-05-08 15:46:35 , processing array 16
## 2019-05-08 15:46:35 , processing array 17
## 2019-05-08 15:46:36 , processing array 18
## 2019-05-08 15:46:36 , processing array 19
## 2019-05-08 15:46:36 , processing array 20
## 2019-05-08 15:46:36 , processing array 21
## 2019-05-08 15:46:36 , processing array 22
## 2019-05-08 15:46:36 , processing array 23
## 2019-05-08 15:46:36 , processing array 24
## 2019-05-08 15:46:36 , processing array 25
## 2019-05-08 15:46:36 , processing array 26
## 2019-05-08 15:46:36 , processing array 27
## 2019-05-08 15:46:36 , processing array 28
## 2019-05-08 15:46:36 , processing array 29
## 2019-05-08 15:46:37 , processing array 30
## 2019-05-08 15:46:37 , processing array 31
## 2019-05-08 15:46:37 , processing array 32
## 2019-05-08 15:46:37 , processing array 33
## 2019-05-08 15:46:37 , processing array 34
## 2019-05-08 15:46:37 , processing array 35
## 2019-05-08 15:46:37 , processing array 36
## 2019-05-08 15:46:37 , processing array 37
## 2019-05-08 15:46:37 , processing array 38
## 2019-05-08 15:46:38 , processing array 39
## 2019-05-08 15:46:38 , processing array 40
## 2019-05-08 15:46:38 , processing array 41
## 2019-05-08 15:46:38 , processing array 42
## 2019-05-08 15:46:38 , processing array 43
## 2019-05-08 15:46:38 , processing array 44
## 2019-05-08 15:46:38 , processing array 45
## 2019-05-08 15:46:38 , processing array 46
## 2019-05-08 15:46:38 , processing array 47
## 2019-05-08 15:46:38 , processing array 48
## 2019-05-08 15:46:38 , processing array 49
## 2019-05-08 15:46:38 , processing array 50
## 2019-05-08 15:46:39 , processing array 51
## 2019-05-08 15:46:39 , processing array 52
## 2019-05-08 15:46:39 , processing array 53
## 2019-05-08 15:46:39 , processing array 54
## 2019-05-08 15:46:39 , processing array 55
## 2019-05-08 15:46:39 , processing array 56
## 2019-05-08 15:46:39 , processing array 57
## 2019-05-08 15:46:39 , processing array 58
datExpr = exprs(LumiBatch_v)
# Perform Differential Expression Analysis
SAM_fit = SAM(datExpr, as.factor(datMeta$Disease.status), resp.type = 'Two class unpaired',
geneid = rownames(datExpr), random.seed = 1234, logged2 = TRUE, fdr.output = 1)
## perm= 1
## perm= 2
## perm= 3
## perm= 4
## perm= 5
## perm= 6
## perm= 7
## perm= 8
## perm= 9
## perm= 10
## perm= 11
## perm= 12
## perm= 13
## perm= 14
## perm= 15
## perm= 16
## perm= 17
## perm= 18
## perm= 19
## perm= 20
## perm= 21
## perm= 22
## perm= 23
## perm= 24
## perm= 25
## perm= 26
## perm= 27
## perm= 28
## perm= 29
## perm= 30
## perm= 31
## perm= 32
## perm= 33
## perm= 34
## perm= 35
## perm= 36
## perm= 37
## perm= 38
## perm= 39
## perm= 40
## perm= 41
## perm= 42
## perm= 43
## perm= 44
## perm= 45
## perm= 46
## perm= 47
## perm= 48
## perm= 49
## perm= 50
## perm= 51
## perm= 52
## perm= 53
## perm= 54
## perm= 55
## perm= 56
## perm= 57
## perm= 58
## perm= 59
## perm= 60
## perm= 61
## perm= 62
## perm= 63
## perm= 64
## perm= 65
## perm= 66
## perm= 67
## perm= 68
## perm= 69
## perm= 70
## perm= 71
## perm= 72
## perm= 73
## perm= 74
## perm= 75
## perm= 76
## perm= 77
## perm= 78
## perm= 79
## perm= 80
## perm= 81
## perm= 82
## perm= 83
## perm= 84
## perm= 85
## perm= 86
## perm= 87
## perm= 88
## perm= 89
## perm= 90
## perm= 91
## perm= 92
## perm= 93
## perm= 94
## perm= 95
## perm= 96
## perm= 97
## perm= 98
## perm= 99
## perm= 100
##
## Computing delta table
## 1
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23
## 24
## 25
## 26
## 27
## 28
## 29
## 30
## 31
## 32
## 33
## 34
## 35
## 36
## 37
## 38
## 39
## 40
## 41
## 42
## 43
## 44
## 45
## 46
## 47
## 48
## 49
## 50
genes_up = SAM_fit$siggenes.table$genes.up %>% data.frame
genes_down = SAM_fit$siggenes.table$genes.lo %>% data.frame
DE_info = genes_up %>% bind_rows(genes_down)
maxFC = max(as.numeric(DE_info$Fold.Change[DE_info$Fold.Change!='Inf']))
DE_info = DE_info %>% mutate('logFC'=ifelse(Fold.Change=='Inf', log2(maxFC+1),
log2(as.numeric(Fold.Change)+1)),
'ID' = Gene.Name)
rm(SAM_fit, genes_up, genes_down, maxFC)
ASD_vs_CTL = make_ASD_vs_CTL_df(datExpr)
p1 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, mean, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Mean (left) and Standard Deviation (right) by score') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
p2 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, sd, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Mean (left) and Standard Deviation (right) by score') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
subplot(p1, p2, nrows=1)
rm(p1, p2)
ggplot(ASD_vs_CTL, aes(mean, sd)) + geom_point(alpha=0.1, aes(id=ID, fill=Group, color=Group)) +
scale_color_manual(values=gg_colour_hue(9)) + scale_fill_manual(values=gg_colour_hue(9)) +
scale_x_continuous(trans='log2') + scale_y_continuous(trans='log2') +
ggtitle(paste0('Corr=',round(cor(ASD_vs_CTL$mean, ASD_vs_CTL$sd), 2))) +
geom_smooth(method=lm, se=FALSE, color='#808080', size=0.5) + theme_minimal()
p1 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, abs(mean_diff), fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Difference in mean (left) and log Fold Change (right) by Diagnosis') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
p2 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, logFC, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Difference in mean (left) and log Fold Change (right) by Diagnosis') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
subplot(p1, p2, nrows=1)
rm(p1, p2)
# Remove single batches
datMeta$Chip = sapply(datMeta$chip_array, function(x) substr(x,1,nchar(x)-2))
to_remove = datMeta$Chip == '4936551002'
datExpr = datExpr[,!to_remove]
datMeta = datMeta[!to_remove,]
# Batch Correction
mod = model.matrix(~Diagnosis_, data=datMeta)
batch = factor(datMeta$Chip)
datExpr = ComBat(datExpr, batch=batch, mod=mod)
## Standardizing Data across genes
rm(to_remove, mod, batch)
ASD_vs_CTL = make_ASD_vs_CTL_df(datExpr)
p1 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, mean, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Mean (left) and Standard Deviation (right) by score') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
p2 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, sd, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Mean (left) and Standard Deviation (right) by score') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
subplot(p1, p2, nrows=1)
rm(p1, p2)
ggplot(ASD_vs_CTL, aes(mean, sd)) + geom_point(alpha=0.1, aes(id=ID, fill=Group, color=Group)) +
scale_color_manual(values=gg_colour_hue(9)) + scale_fill_manual(values=gg_colour_hue(9)) +
scale_x_continuous(trans='log2') + scale_y_continuous(trans='log2') +
ggtitle(paste0('Corr=',round(cor(ASD_vs_CTL$mean, ASD_vs_CTL$sd), 2))) +
geom_smooth(method=lm, se=FALSE, color='#808080', size=0.5) + theme_minimal()
p1 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, abs(mean_diff), fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Difference in mean (left) and log Fold Change (right) by Diagnosis') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
p2 = ggplotly(ggplot(ASD_vs_CTL, aes(Group, logFC, fill=Group)) +
geom_boxplot() + theme_minimal() +
ggtitle('Difference in mean (left) and log Fold Change (right) by Diagnosis') +
scale_fill_manual(values=gg_colour_hue(9)) +
theme(legend.position='none', axis.text.x=element_text(angle=45, hjust=1)))
subplot(p1, p2, nrows=1)
rm(p1, p2)